clear all
set more off
cd "/homes/nber/yasenov"

use "~/matched_30_40.dta"

*************************
*** SAMPLE SELECTION ****
*************************
sum age_A
keep if border1930 == 1

keep if age_A >= 18 & age_A <= 55  
keep if bpl_A<15000 | bpl_A==90011 | bpl_A==90021 | bpl_A==90022
drop if mbpl_A == 20000 | fbpl_A == 20000
drop if labforce_A == 1 
drop if empstat_A == 13 
drop if classwkr_A == 29
drop if school_A == 2

sum age_A
keep statef* sea* county* occ1950* empstat* occsco* urban* migplac*	classwk* marst* lit* race* age* ind1950_A


************************
*** 1910 BOUNDARIES ****
************************
gen county = (statefip_A * 1000) + (county_A / 10)
replace county=41061 if county==41060.5

gen county40 = (statefip_B * 1000) + (county_B / 10)
replace county40=41061 if county40==41060.5

foreach j of varlist county county40 {

	* Arizona
	replace `j' = 4009 if `j' == 4011

	* Louisiana
	replace `j' = 22097 if `j' == 22039
	replace `j' = 22019 if `j' == 22011 | `j' == 22003 | `j' == 22053

	* New Mexico
	replace `j' = 35017 if `j' == 35023
	replace `j' = 35019 if `j' == 35011
	replace `j' = 35005 if `j' == 35025

	* Nevada
	replace `j' = 32013 if `j' == 32027
	replace `j' = 32009 if `j' == 32021

	* Oklahoma
	replace `j' = 40031 if `j' == 40033

	* Oregon
	replace `j' = 41013 if `j' == 41031 | `j' == 41017

	* Texas
	replace `j' = 48141 if `j' == 48229 | `j' == 48109
	replace `j' = 48137 if `j' == 48385
	replace `j' = 48355 if `j' == 48273 | `j' == 42849
	replace `j' = 48061 if `j' == 48489
	replace `j' = 48427 if `j' == 48407

	* Utah
	replace `j' = 49047 if `j' == 49009
	replace `j' = 49051 if `j' == 49013
}

**************
*** MERGE ****
**************

merge m:1 county using "~/aggregate_nber.dta", keep(1 3) 
tab county if _merge == 1, sort
tab county if _merge == 2, sort
drop _merge

drop sea statefip pop30
compress

rename occ1950_A occ19501930   // this is 1930
merge m:1 occ19501930 using "~/occ_wage.dta"
rename wage30 wage30_1930
tab occ19501930 if _merge == 1, sort
tab occ19501930 if _merge == 2, sort
drop _merge
rename occ19501930 occ1950_A

rename occ1950_B occ19501930    // this is 1940
merge m:1 occ19501930 using "~/occ_wage.dta"
rename wage30 wage30_1940
tab occ19501930 if _merge == 1, sort
tab occ19501930 if _merge == 2, sort
drop _merge
rename occ19501930 occ1950_B

replace wage30 = . if occ1950_A > 970
replace wage40 = . if occ1950_B > 970

****************************
*** OCCUPATION GROUPINGS ***
****************************
local yrs A B
foreach j of local yrs {
	replace occ1950_`j'=0 if occ1950_`j'>=0 & occ1950_`j'<=99 
	replace occ1950_`j'=100 if occ1950_`j'>=100 & occ1950_`j'<=123 
	replace occ1950_`j'=200 if occ1950_`j'>=200 & occ1950_`j'<=290
	replace occ1950_`j'=300 if occ1950_`j'>=300 & occ1950_`j'<=390
	replace occ1950_`j'=400 if occ1950_`j'>=400 & occ1950_`j'<=490
	replace occ1950_`j'=500 if occ1950_`j'>=500 & occ1950_`j'<=595
	replace occ1950_`j'=600 if occ1950_`j'>=600 & occ1950_`j'<=690
	replace occ1950_`j'=700 if occ1950_`j'>=700 & occ1950_`j'<=790
	replace occ1950_`j'=810 if occ1950_`j'>=810 & occ1950_`j'<=840
	replace occ1950_`j'=910 if occ1950_`j'>=910 & occ1950_`j'<=970
	replace occ1950_`j'=999 if occ1950_`j'>970
	tab occ1950_`j', m
}

label define vasocc 0 "Professional, Technical" ///
	100 "Farmers" ///
	200 "Managers, Officials, and Proprietors" ///
	300 "Clerical and Kindred" ///
	400 "Sales workers" ///
	500 "Craftsmen" ///
	600 "Operatives" ///
	700 "Service Workers" ///
	810 "Farm Laborers" ///
	910 "Laborers" ///
	999 "Not yet classified"
label values occ1950_A occ1950_B vasocc	

gen byte low_skilled = inlist(occ1950_A, 700, 810, 910)
gen byte low_skilled1940 = inlist(occ1950_B, 700, 810, 910)
gen byte high_skilled  = inlist(occ1950_A, 0, 100, 200, 300, 400, 500, 600)
gen byte high_skilled1940  = inlist(occ1950_B, 0, 100, 200, 300, 400, 500, 600)

tab low_skilled high_skilled, m
tab low_skilled1940 high_skilled1940, m
tab low_skilled*, m
tab high_skilled*, m
tab high_skilled*, m

***************************
*** COUNT BY OCCUPATION ***
***************************
preserve
gen pop = 1
collapse (sum) pop, by(occ1950_A)
list
drop if occ1950_A == 100 | occ1950_A == 999
sum pop
gen share = pop / r(sum)
replace share = share * 100
gsort - share
format share %4.2f

list occ1950_A share pop
listtex occ1950_A share pop using "~/occ_count.tex", replace rstyle(tabular) ///
	head({\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}\begin{tabular}{lcc}	\hline\hline \\) ///
	foot(\hline\hline\end{tabular}})
restore

	
***************************
*** INDUSTRY GROUPINGS ****
***************************

gen ind1950 = 1 * (ind1950_A >= 105 & ind1950_A <= 126) + ///
	2 * (ind1950_A >= 206 & ind1950_A <= 239) + ///
	3 * (ind1950_A == 246) + ///
	4 * (ind1950_A >= 306 & ind1950_A <= 499) + ///
	5 * (ind1950_A >= 506 & ind1950_A <= 598) + ///
	6 * (ind1950_A >= 606 & ind1950_A <= 699) + ///
	7 * (ind1950_A >= 716 & ind1950_A <= 756) + ///
	8 * (ind1950_A >= 806 & ind1950_A <= 817) + ///
	9 * (ind1950_A >= 826 & ind1950_A <= 849) + ///
	10 * (ind1950_A >= 856 & ind1950_A <= 859) + ///
	11 * (ind1950_A >= 868 & ind1950_A <= 899) + ///
	12 * (ind1950_A >= 906 & ind1950_A <= 946) + ///
	13 * (ind1950_A > 946 | ind1950_A)
	
label define inds_lbl 1 "Agriculture, Forestry, and Fishing" ///
	2 "Mining" ///
	3 "Construction" ///
	4 "Manufacturing" ///
	5 "Transportation, Communication, and Other Utilities" ///
	6 "Wholesale and Retail Trade" ///
	7 "Finance, Insurance, and Real Estate" ///
	8 "Business and Repair Services" ///
	9 "Personal services" ///
	10 "Entertainment and Recreation Services" ///
	11 "Professional and Related Services" ///
	12 "Public Administration" ///
	13 "Other"
label values ind1950 inds_lbl

tab ind1950, m
tab ind1950_A if mi(ind1950) | ind1950 == 0

drop ind1950_A

*****************
*** OUTCOMES ****
***************** 

* EMPLOYMENT OUTCOMES
gen byte dempl = (empstat_B >= 10 & empstat_B <= 12) - (empstat_A == 10)
gen byte dempl_1 = (empstat_A == 20)  & (empstat_B >= 10 & empstat_B <= 12) //not employed -> employed
gen byte dempl_2 = (empstat_A == 10)  & (empstat_B >= 20 & empstat_B <= 22) //employed -> not employed

label var dempl "empl 1940 - empl 1930 "
label var dempl_1 "unemployed -> employed"
label var dempl_2 "employed -> unemployed"

gen byte dempl_alternative = (classwkr_B >= 11 & classwkr_B <=29) - (classwkr_A >= 11 & classwkr_A <=29)
gen byte dempl_1alternative = (classwkr_A < 11 | classwkr_A >29 | mi(classwkr_A)) & (classwkr_B >= 11 & classwkr_B <=29) //not employed -> employed

* WAGE OUTCOMES
gen dwage = wage30_1940 - wage30_1930 // if dempl_4 == 1 //Occupational wage ONLY for (employed -> employed)

gen doccscore = log(occscore_B/occscore_A)
replace doccscore = . if occscore_B == 0 | occscore_A == 0

label var dwage "change in occ wage 1930-40" 
label var doccscore "log(occscore_B/occscore_A)"

* RURAL/URBAN OUTCOMES
gen byte drural_1 = (urban_A == 2 & urban_B == 1)  // "urban -> rural"
gen byte drural_2 = (urban_A == 1 & urban_B == 2)  // "rural -> urban"

label var drural_1 "urban -> rural"
label var drural_2 "rural -> urban"

* MIGRATION OUTCOMES
gen byte stayer = statefip_A == migplac5_B   // same state 1930-35
replace stayer = . if migplac5_B == 999
gen byte stayer2 = statefip_A == statefip_B  // same state 1930-40
gen byte stayer3 = county != county40        // same county 1930-40

label var stayer "state 1930 = state 1935"
label var stayer2 "state 1930 = state 1940"
label var stayer3 "county 1930 = county 1940"

* SKILL OUTCOMES
gen byte dfarmer = occ1950_A != 100 & occ1950_B == 100
gen byte dfarming = (occ1950_A != 100 & occ1950_A != 810) & (occ1950_B == 100 | occ1950_B == 810)

gen byte dselfempl = (classwkr_B == 12) - (classwkr_A == 12)

*gen byte dhighskill = (high_skilled1940 == 1) - (high_skilled == 1)
*gen byte dlowskill = (low_skilled1940 == 1) - (low_skilled == 1)

label var dfarmer "not farmer -> farmer"
label var dfarming "not in farming -> farming"
label var dselfempl "self_empl 1940 - self_empl 1930"
*label var dhighskill "high_skill 1940 - high_skill 1930"
*label var dlowskill "low_skill 1940 - low_skill 1930"

************************
*** OTHER VARIABLES ****
************************
gen byte married_A = marst_A == 1 | marst_A == 2
gen byte literate_A = lit_A == 4
gen byte black = race_A == 200

*******************
*** DATA CHECK ****
*******************
sum demp* dwage drural* stayer* rail* married_A literate_A dfarm* dself* dhigh* low* high* d* migr*

tab dempl, m
tab dempl dempl_1, m
tab dempl dempl_2, m
tab dempl_1 dempl_2, m

tab drural*, m
tab stayer, m
tab stayer2, m
tab stayer3, m
*tab dfarm*, m

tab dself, m
*tab dhigh, m
*tab dlow, m
*tab dhigh dlow, m

compress
sum

save "~/matched_30_40_ready.dta", replace
